/*
 * This code is adopted from SOCS
 */
#include "ParameterList.h"
#ifdef _OPENMP
#include <omp.h>
#endif
using namespace std;

MappingOpts::MappingOpts(void)
{
    this->setDefaults();
}

MappingOpts::~MappingOpts(void)
{
    ;
}

void MappingOpts::setDefaults(void)
{
    this->fullCommand = string("perm");
    this->readLength = DEFAULT_READ_LENGTH;
    this->readLength2 = DEFAULT_READ_LENGTH;
    // Mapping criteria
    this->ambiguousDiffThreshold = 0;
    this->maxAlignPerRead = DEFAULT_MAPPING_PER_READ;
    // int mismatchScoreThreshold = -1; // No filtering by sum of mismatch score
    this->subDiffThreshold = 2;
    this->subDiffThreshold2 = 2;
    this->truncatedReadPrefix = 0;
    // Output all the best alignments in terms of # of mismatches by default.
    this->bMap2ForwardStrandOnly = false;
    this->bMap2ReverseStrandOnly = false;
    this->bExcludeAmbiguousReads = false;
    this->bGetAllAlignments = false;
    this->bIgnoreQS = false;
    this->bPrintAlignments = true;
    this->bPrintAmbiguousReadsOnly = false;
    this->bPrintBadReads = false;
    this->bPrintFirstAlignmentOnly = false;
    this->bPrintNM = false;
    this->bPrintNH = false;
    this->bPrintSamHeader = true;
    this->bPrintAmbigReadsSeparately = false;
    this->bPrintUnMappedReads = false;
    this->bPrintAmbigReadsInOneLine = false;
    this->bZipOutput = false;
    // I/O.
    this->readtag_delimiter = ',';
    strcpy(this->readsFileFormat, "");
    strcpy(this->logFileN, "");
    strcpy(this->outputDir, "");
    strcpy(this->outputFileN, "");
    strcpy(this->outputFormat, "");
    strcpy(this->readsFileFormat, "");
    strcpy(this->ambiguousReadFileN, "");
    strcpy(this->badReadFileN, "");
    strcpy(this->unmappedFileN, "");
    // mate-pairs
    this->bExcludeAmbiguousPaired = false;
    this->bPrintBestPaired = false;
    this->disUB = DEFAULT_UPPER_BOUND;
    this->disLB = 0;
    this->frOnly = false; // Paired end can only aligned to the same strand
    this->ffOnly = false; // Paired end can only aligned to the same strand.
    this->bPrintRef4PairedInMapping = false;
    this->bPrintPairedRQ = false;
    // performance
#ifdef _OPENMP // Parallelization 
    this->maxThreadNum = omp_get_num_procs();
#else
    this->maxThreadNum = 1;
#endif // Because OpenMP 2.5 only support signed integer loop. Case the uiSize. The number of bucket is limited to signed int  	
}

void MappingOpts::clearOutputFileName(bool clear)
{
    if (clear) {
        strcpy(this->outputFileN, "");
        strcpy(this->unmappedFileN, "");
    }
}

ParameterList::ParameterList()
{
    setDefaults();
}

void ParameterList::setDefaults()
{
    this->validFlag = true;
    this->refFormat == "unknown";
    strcpy(refFile, "");
    strcpy(readsFile, "");
    strcpy(this->indexFileN, "");
    strcpy(this->seedName, "");
    strcpy(this->seedName2, "");
    // Index
    this->bMaskedMathRepeat = false;
    this->bMakeIndex = false;
    this->bSaveIndex = false;
    // Mate-pair
    this->bMatePairedReads = false;
    strcpy(this->matePairFileN1, "");
    strcpy(this->matePairFileN2, "");
}

// int selectSeedByReadLength(ParameterList& P)
int selectSeedByReadLength(unsigned int readLength, bool bMappedSOLiDRead, unsigned int subDiffThreshold)
{
    if (subDiffThreshold == 0 && readLength < 30) {
        return(0);
    }
    if (bMappedSOLiDRead) {
        if (readLength <= 25) {
            return(2);
        } else if (readLength <= 35) {
            return(11);
        }
    } else {
        bool bMappedLongRead = isReadProperLong(readLength);
        if (readLength <= 35) {
            if (readLength < 25 || subDiffThreshold < 2) {
                return(1);
            } else {
                return(2);
            }
        } else if (bMappedLongRead) {
            int potentialSeedId = subDiffThreshold / 2;
            if (potentialSeedId <= 4 && potentialSeedId > 0) {
                return(potentialSeedId);
            }
        }
    }
    if (subDiffThreshold <= 2) {
        return(2);
    } else {
        return(3);
    }
}

// This function decide options base on the ext name of some options
int selectSeed(string& seed)
{
    if ( seed == "F0") {
        return(0);
    } else if ( seed == "F1") {
        return(1);
    } else if ( seed == "F2") {
        return(2);
    } else if (seed == "S11") {
        return(11);
    } else if (seed == "F3") {
        return(3);
    } else if (seed == "F4") {
        return(4);
    } else if (seed == "S20") {
        return(20);
    } else if (seed == "S12") {
        return(12);
    } else {
        // current don't provide higher full sensitivity seed
        return(-1);
    }
}

// This function decide options base on the ext name of some options
void selectSeed(ParameterList& P)
{
    string seed1(P.seedName);
    string seed2(P.seedName2);
    P.seedId = selectSeed(seed1);
    P.seedId2 = selectSeed(seed2);
    if(P.seedId < 0)  {
        P.seedId = selectSeedByReadLength(P.readLength, P.bMappedSOLiDRead, P.subDiffThreshold);
    }
    if(P.seedId2 < 0)  {
        P.seedId2 = selectSeedByReadLength(P.readLength2, P.bMappedSOLiDRead, P.subDiffThreshold2);
    }
}

inline string fileFormatSymbol2String(char cFileFormatSymbol)
{
    string fileFormat;
    switch (cFileFormatSymbol) {
    case 'Q':
        fileFormat = string("csfastq");
        return(fileFormat);
    case 'q':
        fileFormat = string("fastq");
        return(fileFormat);
    case 'S':
        fileFormat = string("csfasta");
        return(fileFormat);
    default:
        fileFormat = string("fasta");
        return(fileFormat);
    }
}

// Print out the setting info
void ParameterList::printSetting(void)
{
    // Read type, allowing mismatches, allowing ambiguous read
    printf("Options Info:\n");
    if (this->bMappedSOLiDRead) {
        printf("Expect reads in color space.\n");
    }

    string fileType = fileFormatSymbol2String(this->cFileFormatSymbol);
    printf("Reads are processed as in %s format.\n", fileType.c_str());
    if (this->bDiscardReadWithN) {
        printf("Reads with 'N' or unknown characters will be discarded.\n");
    } else if (this->allowedNumOfNinRead > 0) {
        printf("Reads with %d 'N' or unknown characters will be discarded.\n", this->allowedNumOfNinRead);
    }
    printf("Results for reads that map to more than %d locations will not be reported.\n", this->maxAlignPerRead);
    if (this->truncatedReadPrefix > 0) {
        printf("The first %u bases will be excluded as barcode.\n", this->truncatedReadPrefix);
    }
    printf("The effective read length is %u.\n", this->readLength);
    printf("%d mismatches are allowed in the length.\n", this->subDiffThreshold);

    if (this->bPrintAmbiguousReadsOnly) {
        printf("Only reads mapped to multiple locations will be printed.\n");
    } else if (this->bExcludeAmbiguousReads) {
        printf("Reads mapped to multiple locations will be exclueded.\n");
    } else if (this->bGetAllAlignments && !this->bExcludeAmbiguousReads) {
        printf("Reads which have alignments within %u mismatches will be counted as mapped.\n", this->subDiffThreshold);
        printf("Reads which have more than %u alignments won't be printed.\n", this->maxAlignPerRead);
    } else {
        printf("Alignments with minimum mismatches will be collected.\n");
    }
    if(this->bMap2ForwardStrandOnly) {
        printf("Reads are mapped to only the forward strand.\n");
    }
    if (this->bMap2ReverseStrandOnly) {
        printf("Reads are mapped to only the reverse strand.\n");
    }
    if(this->bMap2ForwardStrandOnly && this->bMap2ReverseStrandOnly) {
        printf("Conflict!\n");
    }
    cout << endl;
}

// This function decide options base on the ext name of some options
void ParameterList::getOptsByCheckingExtName(void)
{
    // fixed the --readsFormat input
    char c = this->readsFileFormat[0];
    if (c != '\0' && c != '.') {
        strcpy(&this->readsFileFormat[1], string(this->readsFileFormat).c_str());
        this->readsFileFormat[0] = '.';
    }

    // choose the output format by ext name
    // Also check .SAM .Mapping or .fastq (Not case sensitive)
    if ((hasTheExtName(this->outputFileN, ".sam"))) {
        if (this->outputFormat[0] == '\0') {
            strcpy(this->outputFormat, "sam");
        } else if (strcmp(this->outputFormat,"sam") != 0 && strcmp(this->outputFormat,"SAM")) {
            LOG_INFO("Info %d: Conflict between ext file and --outputformat %s!\n", WARNING_LOG, this->outputFormat);
        }
    }
    if (hasTheExtName(this->outputFileN, ".mapping")) {
        if (this->outputFormat[0] == '\0') {
            strcpy(this->outputFormat, "mapping");
        } else if (strcmp(this->outputFormat,"mapping") != 0 && strcmp(this->outputFormat,"MAPPING") != 0) {
            LOG_INFO("Info %d: Conflict between ext file and --outputformat %s!\n", WARNING_LOG, this->outputFormat);
        }
    }
    if (hasTheExtName(this->outputFileN, ".fastq") || hasTheExtName(this->outputFileN, ".fq") ||
            hasTheExtName(this->outputFileN, ".FASTQ") || hasTheExtName(this->outputFileN, ".FQ") ) {
        if (this->outputFormat[0] == '\0') {
            strcpy(this->outputFormat, "fastq");
        } else if (strcmp(this->outputFormat,"fastq") != 0 && strcmp(this->outputFormat,"FASTQ") != 0 &&
                   strcmp(this->outputFormat,"fq") != 0 && strcmp(this->outputFormat,"FQ") != 0) {
            LOG_INFO("Info %d: Conflict between ext file and --outputformat %s!\n", WARNING_LOG, this->outputFormat);
        }
    }
}

bool ParameterList::checkRefValidity(void)
{
    if (this->refFile != NULL && !fileExist(this->refFile)
            && (int)strlen(this->refFile) > 0) {
        LOG_INFO("Info %d: Reference file %s is not found!\n",\
                 ERROR_LOG, this->refFile);
        this->validFlag = false;
    } else if (!(withFastaExtFileName(this->refFile) || \
                 hasTheExtName(this->refFile, ".dat") || \
                 hasTheExtName(this->refFile, ".txt") || \
                 hasTheExtName(this->refFile, ".index")) &&\
               !(this->refFormat == "fasta" ||\
                 this->refFormat == "list" ||\
                 this->refFormat == "index" )) {
        LOG_INFO("\nInfo %d: Reference %s has an unexpected ext name.\n",\
                 ERROR_LOG, this->readsFile);
        this->validFlag = false;
        printSynopsis();
    }
    return(this->validFlag);
}

bool ParameterList::isReadLengthValid(void)
{
    if (isReadTooLong(this->readLength) || isReadTooLong(this->readLength2)) {
        // Consider to truncated read automatically
        LOG_INFO("\nInfo %d: Read length %d is longer than the maximum read length %d.\n",\
                 ERROR_LOG, this->readLength, MAX_READ_LENGTH * 2);
        LOG_INFO("\nInfo %d: Use -T # option to keep only the first # bases of the read.\n", ERROR_LOG);
        return(false);
    } else if (isReadTooLong(this->anchorLength)) {
        LOG_INFO("Info %d: Read length %d could be too short!\n", INFO_LOG, this->readLength);
        return(false);
    } else if(isReadTooLong(this->anchorLength2)) {
        LOG_INFO("Info %d: Read length (2nd-end) %d could be too short!\n", INFO_LOG, this->readLength2);
        return(false);
    } else {
        return(true);
    }
}

// TODO: get proper truncatReadLength for different read length
bool ParameterList::truncatReadLength(void)
{
    // Truncate reads if opts are specified
    bool bTruncateRead = this->truncatedReadPrefix > 0;
    // Ex: if -t 5 -T 40 for a line with 50 char, cut the reads [5-44]
    bTruncateRead |= (this->readLength > (this->truncatedReadLength + this->truncatedReadPrefix));
    // this->bMatePairedReads has a special format fro 5'-3' 3'-5' (Don't truncate in that case)
    if (bTruncateRead) {
        if (this->readLength > this->truncatedReadPrefix) {
            this->readLength = min(this->truncatedReadLength, this->readLength - this->truncatedReadPrefix);
            this->readLength2 = min(this->truncatedReadLength, this->readLength2 - this->truncatedReadPrefix);
        }
        if (this->truncatedReadPrefix > 0) {
            LOG_INFO("Info %d: The first %d bp of the read will be ignored.\n", INFO_LOG, this->truncatedReadPrefix);
        }
        LOG_INFO("Info %d: The following %d bp will be kept.\n", INFO_LOG, this->readLength);
    }

    this->anchorLength = this->readLength;
    this->anchorLength2 = this->readLength2;
    if (isReadLengthValid()) {
        if (isReadProperLong(this->readLength)) {
            /*
            if (this->bMappedSOLiDRead) {
            	this->readLength = min(MAX_READ_LENGTH, this->readLength); // Truncated to read length 64 for SOLiD read.
            	this->anchorLength = min(MAX_READ_LENGTH, this->readLength); // Truncated to read length 64 for SOLiD read.
            } else */ {
                this->bMappedLongRead = true; // long reads in Illumina
                // when build up the table, use half read length as seed.
                if (this->readLength % 2 == 1) {
                    this->bOddReadLengthAndLongRead = true;
                    this->anchorLength = (this->readLength + 1) / 2;
                } else {
                    this->anchorLength = this->readLength / 2;
                }
            }
        }
        if(isReadProperLong(this->readLength2)) {
            /* if (this->bMappedSOLiDRead) {
            	this->readLength2 = min(MAX_READ_LENGTH, this->readLength2); // Truncated to read length 64 for SOLiD read.
            	this->anchorLength2 = min(MAX_READ_LENGTH, this->readLength2); // Truncated to read length 64 for SOLiD read.
            } else */ {
                this->bMappedLongRead2 = true; // long reads in Illumina
                // when build up the table, use half read length as seed.
                if (this->readLength2 % 2 == 1) {
                    this->bOddReadLengthAndLongRead2 = true;
                    this->anchorLength2 = (this->readLength2 + 1) / 2;
                } else {
                    this->anchorLength2 = this->readLength2 / 2;
                }
            }
        }
        // TODO comment the line after modify PerM to take more general paired read lengths
        this->truncatReadLength4LongShortPairedRead();
        return(true);
    } else {
        return(false);
    }
}

bool ParameterList::truncatReadLength4LongShortPairedRead(void)
{
    // TODO: double check if the readLength and anchorLength are set correctly.
    if(isReadProperLong(this->readLength) && isReadShort(this->readLength2)) {
        if(isReadProperLong(2*this->readLength2)) {
            if(2*this->readLength2 < this->readLength) {
                LOG_INFO("Info %d: 1st-end read will be truncated from %d bp to %d bp!\n", INFO_LOG, this->readLength, 2*this->readLength2);
                // EX: end1 has 75 bp, end2 has 35 bp. end1 will be truncated to 70 bp.
                this->anchorLength = this->readLength2;
                this->readLength = 2*this->anchorLength;
                this->bOddReadLengthAndLongRead = false;
            } else if(2*this->readLength2 > this->readLength) {
                LOG_INFO("Info %d: 2nd-end read will be truncated from %d bp to %d bp!\n", INFO_LOG, this->readLength2, this->readLength/2);
                // EX: end1 has 65 bp, end2 has 35 bp. end2 will be truncated to 33 bp.
                this->readLength2 = (this->readLength + 1) / 2;
                this->anchorLength2 = this->readLength2;
                this->bOddReadLengthAndLongRead = true;
            }
        }
    }
    return(false);
}

unsigned int nonSpaceCharCount(const char* str)
{
    unsigned int returnValue = 0;
    for (int i = 0; str[i] != '\0'; i++) {
        if (!(isspace(str[i]))) {
            returnValue++;
        }
    }
    return(returnValue);
}

// Return the read length given a line in the reads input file
unsigned int getReadLength(char* line, char cFileType)
{
    unsigned int readLength = 0;
    switch (cFileType) {
    case 'F': // Given the reads file are fasta format
    case 'q':
        readLength = (unsigned int) nonSpaceCharCount(line);
        return (readLength);
    case 'S': // Given the reads file are SOLiD format
    case 'Q':
        readLength = (unsigned int) nonSpaceCharCount(line) - 1;
        return (readLength);
    default:
        return DEFAULT_READ_LENGTH;
    }
}

bool isSkipLines(const char* line)
{
    return(!isNucleotide(line[0]));
}

// Must add new reads estimate function to support other reads format.
unsigned int getReadLength(const char* readFile, char expFileFormat)
{
    char cFileType;
    if (expFileFormat != 'N') {
        cFileType = expFileFormat;
    } else {
        cFileType = getReadsFileFormatSymbol(readFile);
    }

    ifstream ifile(readFile);
    char seqLine[MAX_LINE];
    do {
        seqLine[0] = EOF;
        ifile.getline(seqLine, MAX_LINE, '\n');
        if (seqLine[0] == EOF) {
            LOG_INFO("\nInfo %d: The read length can not be decided from the input format.\n", WARNING_LOG);
            return(DEFAULT_READ_LENGTH);
        }
    } while (isSkipLines(seqLine));

    unsigned int readLength = getReadLength(seqLine, cFileType);
    // Add the extra read length if it is printed in the following second line.
    if (readLength >= MAX_READ_LENGTH) {
        ifile.getline(seqLine, MAX_LINE, '\n');
        if (!(isSkipLines(seqLine))) {
            readLength += getReadLength(seqLine, cFileType);
        }
    }
    return (readLength);
}

void printSingleSynopsis(void)
{
    cout << "For single end:" << endl;
    cout << "perm <reference or index path> <read length or reads path > <-flag options>" << endl;
    cout << "Ex:  perm Ref.fasta Reads.fa -v 5 -o out.sam " << endl;
    cout << "Ex:  perm RefFilesList.txt ReadsFileList.txt -v 3 -A -m -s my.index" << endl;
    cout << "Ex:  perm Ref.index ReadsSetFilesList.txt -E > my.log" << endl;
    cout << endl;
}

void printPairedSynopsis(void)
{
    cout << "For paired-end reads:" << endl;
    cout << "perm <reference or index path> -1 <forward reads> -2 <reverse reads> <-flag options>" << endl;
    cout << "Ex:  perm chrM.fa  -1 F3.fq -2 R3.fq -U 500 -L 200 -o out.sam." << endl;
    cout << "Ex:  perm hg18.txt -1 F3.csfasta -2 R3.csfasta -U 500 -L 0 -e -m -s hg18.index" << endl;
    cout << "Ex:  perm Transcriptom.index -1 F3.fa -2 R3.fa" << endl;
    cout << endl;
}

void printBuildInDexInfo(void)
{
    cout << "For build index only:" << endl;
    cout << "perm <reference> <read length> <format> <seed> <index path>" << endl;
    cout << "Ex:  perm hg18.fa 50 --readFormat csfasta --seed F3 -s 50F3hg18Solid.index" << endl;
    cout << "Ex:  perm hg18.txt 50 --readFormat fastq --seed F2 -s 50F2hg18Illumina.index " << endl;
    cout << endl;
}

void printSingleEndExample(void)
{
    cout << "Ex:  ./perm Ref.fa Reads.fa -v 5 -E -T 36 -m -s -o out.sam" << endl;
    cout << "PerM will report only the uniquely mapped reads within 5 mismatches in the first 36 bases." << endl;
    cout << "PerM will reconstruct and save the indexes to the default path for future mappings." << endl;
    cout << "PerM will output the mappings file to out.sam in the SAM format." << endl;
    cout << endl;
}

void printPairedEndExample(void)
{
    cout << "Ex:  ./PerM -1 F3.fq -2 R3.fq -U 500 -L 200 -m -s -o out.sam." << endl;
    cout << "PerM will report only the non-ambiguous mate pair in the range of [200bp, 500bp]" << endl;
    cout << "PerM will reconstruct and save the indexes to the default path for future mappings." << endl;
    cout << "PerM will output the mappings file to out.sam in the SAM format." << endl;
    cout << endl;
}

void printIoInfo(void)
{
    cout << "<Reference Input>" << endl;
    cout << "The reference file should be in the fasta format, with *.fa or *.fasta ext filename." << endl;
    cout << "It can be transcriptom, with multiple seqs separated by lines start with '>' and the ref seq name." << endl;
    cout << "If you have multiple files, you can put each file path a line in a .txt file." << endl;
    cout << "ex: chr1.fa\\n chr2.fa\\n .. etc, with each path in one row." << endl;
    cout << "PerM also takes the txt file as ref, a file listing paths allreference paths." << endl;
    cout << "PerM parses a ref file according to it's ext name, which can be overwritten by --refFormat flag."<< endl;
    cout << endl;
    cout << "<Reads Input>" << endl;
    cout << "The reads file should be in the .fasta, .fastq or .csfasta format." << endl;
    cout << "PerM parses a reads file according to it's ext name, which can be overwritten by --readFormat flag."<< endl;
    cout << "Large reads file can be split to small files. Make *.txt file with each line a path of the reads." << endl;
    cout << "Use the txt file as the reads input so PerM can map reads set in parallels." << endl;
    cout << endl;

    cout << "<Mapping output>" << endl;
    cout << "PerM's default output format has the ext name *.mapping." << endl;
    cout << "To output in SAM format, specify flag -o path.sam, instead of -o path.mapping." << endl;
    cout << endl;
}

void printDefault(void)
{
    cout << "When you type:" << endl;
    cout << "\tperm <ref> <reads> for single end read mapping, or" << endl;
    cout << "\tperm <ref> -1 <readFile1> -2 <readFile2> for paired reads maping" << endl;
    cout << endl;
    cout << "PerM uses its default options, which can be overwritten by the options flags." << endl;
    cout << "The min and max of pair-end separations are 0 and 1Mb in a same ref, respectively" << endl;
    cout << "By default, PerM outputs all the best alignments in terms of the mismatches number." << endl;
    cout << "It throw away all reads with 'N' or '.' unless --includeReadsWN is specified" << endl;
    cout << "The default seeds varied in different the read lengthes and types." << endl;
    cout << "It won't save the index if -s is not specified." << endl;
    cout << "When -s is specified but not follow a path, a default filename is used to save the index." << endl;
    cout << "That path will be test for index reused for the mapping next time." << endl;
    cout << "The I/O formats are decided by the the ext name (.sam or .mapping) of the specified I/O file." << endl;
    cout << "unless special flags are set to overwrite the guess." << endl;
    cout << endl;
}

void printSingleEndOptions(void)
{
    cout << "Options for both single and pair-end:" << endl;
    cout << "The default is to output all best alignments in terms of # of mismatches." << endl;
    cout << "-A     Output all alignments within the mismatch threshold (see -v option), end-to-end." << endl;
    cout << "-E     Output only uniquely mapped reads remaining after the best down selection has been applied if applicable." << endl;
    cout << "-E -A  Output only the uniquely mapped reads" << endl;
    cout << "   ex: a read with a exact match and one mismatch, it is unique under only -E" << endl;
    cout << "   but not unique under -E -A." << endl;
    // cout << "-a Print out ambiguous reads only. Those uniquely mapped reads will be ignored" << endl;
    cout << "-v Int Maximum number of mismatches allowed (or allowed in each end for pair-end reads)." << endl;
    cout << "" << endl;
    cout << "-t Number of bases at the 5' end of each read to ignore. For example, if the first 5 bases are used as a barcode or to index multiple samples together, use -t 5. If not specified, no initial bases will be ignored. " << endl;
    cout << "-T Int Number of bases in each read to use, starting after any bases ignored by -t option. " << endl;
    cout << "   Later bases at the 3' of the read are ignored. For example, -T 30 means use only first 30 bases (signals) after the any bases ignored due to the -t option." << endl;
    cout << "-k Int The naxmimun num of mapping per read. The default is " << DEFAULT_MAPPING_PER_READ << '.' << "The maximun is " << MAX_MAP_NO_PER_READ << "." << endl;
    cout << "   reads mapped to more than that location won't be printed by default, unless flag --ambiguosReadOnly is set." << endl;
    cout << "-m     Create the reference index without reusing the saved index even if available. " << endl;
    cout << "-s Path   Save the reference index to accelerate the mapping in the future. If path is not specified, the default path will be used. " << endl;
    cout << "-o Path   Name of mapping output file when mapping a single read set. " << endl;
    cout << "   The output file format will be either the .mapping tab delimited text format or the SAM format as determined by the extension of the output filename." << endl;
    cout << "-d Path   Output directory for mapping output files when mapping multiple read sets (output files will be named automatically)." << endl;
    cout << "-a Path   Create a FASTA (FASTQ) file for reads mapped to more positions than the threshold specified by -k or the default of 200. " << endl;
    cout << "-b Path   Create a FASTA (FASTAQ) file of bad reads or reads shorter than expected." << endl;
    cout << "-u Path   Create a FASTA (FASTAQ) file of unmapped reads." << endl;
    cout << "-u When multiple read sets are mapped, filename is irrelevant and should be omitted." << endl;
    cout << "   the files of unmapped sequences will automatically be named and created in the directory PerM is run from." << endl;
    cout << "If only -a -u or -o is specified without giving a path, a default path will be used instead." << endl;
    cout << "--log Path	Output the mapping count to a specified file" << endl;
    cout << endl;
    cout << "--forwardOnly Map reads to the forward strand only: (This is for SOLiD Strand specific sequencing) " << endl;
    cout << "--reverseOnly Map reads to the reverse strand only: (This is for SOLiD Strand specific sequencing) " << endl;
    cout << "--forwardOnly and --reverseOnly shouldn't use together" << endl;
    cout << "--ambiguosReadOnly Output only ambiguous mapping to find repeats (similar regions within substitution threshold)" << endl;
    cout << "--ambiguosReadInOneLine Output reads mapped to more than k places in one line" << endl;
    cout << ". when this option is specified, reads that mapped to over mapping number threshold that specified by -k will still be printed." << endl;
    cout << "--noSamHeader Do not print the SAM header. This makes it easier to concatenate multiple SAM output files." << endl;
    cout << "--includeReadsWN Int Map reads with at most given number of N or '.' bases by encoding N or '.' as A or 3." << endl;
    cout << "   Normally such reads are ignored. " << endl;
    cout << "--statsOnly Output the mapping statistics to stdout only, without saving alignments to files. " << endl;
    cout << "--ignoreQS Ignore the quality scores in fastq or QUAL files." << endl;
    cout << "--printNM  When quality scores are available, use this flag to print number of mismatches, instead of mismatch scores in mapping format. " << endl;
    cout << "--printNH  Report alignments that contains the query in the current record in the sam format" << endl;
    cout << "--delimiter 'c', where c is the delimiter of the read id, for strange format of read files" << endl;
    cout << "--seed {F2 | S11 | F3 | F4}. Specify the seed pattern. The F0, F1, F2, F3, and F4 seeds are fully sensitive to 0-4 mismatches respectively." << endl;
    cout << "The S11 and S12 seeds are fully sensitive to one biological mismatch (SNP) and one or two SOLiD color mismatches respectively" << endl;
    cout << "See http://bioinformatics.oxfordjournals.org/cgi/content/abstract/25/19/2514" << endl;
    cout << "--refFormat {fasta | list | index }. Take refs file in the specified format," << endl;
    cout << "instead of gussing according to its ext name." << endl;
    cout << "--readFormat {fasta | fastq | csfasta | csfastq}. Take reads file in the specified format," << endl;
    cout << "instead of gussing according to its ext name." << endl;
    cout << "--outputFormat {sam | mapping | fastq}. Output mapping in the specified format," << endl;
    cout << "instead of gussing according to its ext name." << endl;
    cout << endl;
    printSingleEndExample();
}

void printPairedEndOptions(void)
{
    cout << "Options for mate-paired reads:" << endl;
    cout << "-L I   lower bound for mate-paired separation distance" << endl;
    cout << "-U I   upper bound for mate-paired separation distance" << endl;
    cout << "-e     Exclude ambiguous paired " << endl;
    cout << "-o P   where P is the path (single paired read set) or the directory (multiple paired read sets)" << endl;
    cout << "-v2 Int Maximum number of mismatches allowed in the 2nd end for pair-end reads." << endl;
    cout << "The default is set as -L 0 -U 1000000" << endl;
    cout << "--fr Map paired-end reads to different strand only" << endl;
    cout << "--ff Map paired-end reads to the same strand only" << endl;
    cout << "The default option collects paried-end mappings in both the same and difference strands" << endl;
    cout << "--printRefSeq Print the reference sequence of paired-end mapping in .mapping format" << endl;
    cout << "--printQual Print the read qual scores for paired-end mapping in .mapping format" << endl;
    cout << endl;
    printPairedEndExample();
}

void printOptionsInfo(void)
{
    cout << "Type \"perm single\" to see options for single end mapping" << endl;
    cout << "Type \"perm paired\" to see options for paired end mapping" << endl;
    /*
    printSingleEndOptions();
    printPairedEndOptions();
    */
}

void printSynopsis(void)
{
    cout << "PerM (Efficient read mapping with periodic full sensitive seeds)" << endl;
    cout << "Version: 0.4.1\n" << endl;
    cout << "Synopsis:" << endl;
    printSingleSynopsis();
    printPairedSynopsis();
    printBuildInDexInfo();
    cout << "Please type" << endl;
    cout << "perm io            (to see the I/O format)" << endl;
    cout << "perm default       (to see the default opt setting)" << endl;
    cout << "perm single        (to see options for single-end mapping)" << endl;
    cout << "perm paired        (to see options for paired-end mapping)" << endl;
    cout << "For more info, please check: http://code.google.com/p/perm/" << endl;
    cout << endl;
}

void printUsageInfo(string helepOpt)
{
    if (helepOpt == "io") {
        printIoInfo();
    } else if (helepOpt == "default") {
        printDefault();
    } else if (helepOpt == "options") {
        printOptionsInfo();
    } else if(helepOpt == "single") {
        printSingleEndOptions();
    } else if(helepOpt == "paired") {
        printPairedEndOptions();
    } else {
        printSynopsis();
    }
}

string getFullCommand(int argc, const char** argv)
{
    string fullCommand = string(argv[0]);
    int i;
    for (i = 1; i < argc; i++) {
        string tmpStr(argv[i]);
        fullCommand = fullCommand.append(" ").append(tmpStr);
    }
    return(fullCommand);
}

static int assignMismatchThresholdBySeed(const char* seedName, int threshold)
{
    if (strcmp(seedName, "F2") == 0) {
        threshold = 2;
    } else if (strcmp(seedName, "S11") == 0) {
        threshold = 3;
    } else if (strcmp(seedName, "F3") == 0) {
        threshold = 3;
    } else if (strcmp(seedName, "S12") == 0) {
        threshold = 4;
    } else if (strcmp(seedName, "F4") == 0) {
        threshold = 4;
    }
    return(threshold);
}

void getParameterList4PairedEnd(int argc, const char** argv, CFlags& f, ParameterList& p) 
{
        // for mate-pairs
        p.bExcludeAmbiguousPaired = f.checkArg(argc, argv, "-e");
        f.checkpStrOpt(argc, argv, "-1", p.matePairFileN1);
        f.checkpStrOpt(argc, argv, "-2", p.matePairFileN2);
        f.checkIntOpt(argc, argv, "--LowerBound", p.disLB);
        f.checkIntOpt(argc, argv, "-L", p.disLB);
        f.checkIntOpt(argc, argv, "--upperBound", p.disUB);
        f.checkIntOpt(argc, argv, "-U", p.disUB);
        // Pair end can only align to different strands
        p.frOnly = f.checkArg(argc, argv, "--fr");
        // Pair end can only align to the same strand.
        p.ffOnly = f.checkArg(argc, argv, "--ff");
        p.bPrintRef4PairedInMapping = f.checkArg(argc, argv, "--printRefSeq");
        p.bPrintPairedRQ = f.checkArg(argc, argv, "--printQual");
}

void getParameterList4OptionalOutput(int argc, const char** argv, CFlags& f, ParameterList& p)
{
        p.bPrintAmbigReadsSeparately = f.checkArg(argc, argv,  "-a");
        f.checkpStrOpt(argc, argv, "-a", p.ambiguousReadFileN);
        p.bPrintBadReads = f.checkArg(argc, argv,  "-b");
        f.checkpStrOpt(argc, argv, "-b", p.badReadFileN);
        p.bPrintUnMappedReads = f.checkArg(argc, argv,  "-u");
        p.bZipOutput= f.checkArg(argc, argv,  "-z");
        f.checkpStrOpt(argc, argv, "-u", p.unmappedFileN);
        f.checkpStrOpt(argc, argv, "-o", p.outputFileN);
        f.checkpStrOpt(argc, argv, "--outputFormat", p.outputFormat);
        p.bPrintSamHeader = !f.checkArg(argc, argv, "--noSamHeader");
        f.checkpStrOpt(argc, argv, "-d", p.outputDir);
        f.checkpStrOpt(argc, argv, "--log", p.logFileN);
}

void getParameterList4AmbiguousReadFiltering(int argc, const char** argv, CFlags& f, ParameterList& p)
{
        // The default is to output the best alignments in term of number of mismatches.
        p.bGetAllAlignments = f.checkArg(argc, argv, "-A");
        p.bExcludeAmbiguousReads = f.checkArg(argc, argv, "-E");
        p.bPrintBestPaired = f.checkArg(argc, argv, "-B");
        // Speicil hidden flag that print only ambiugous read. Should be used with -E
        p.bPrintAmbiguousReadsOnly = f.checkArg(argc, argv, "--ambiguosReadOnly");
        p.bPrintFirstAlignmentOnly = f.checkArg(argc, argv, "--1stAlignmentOnly");
        p.bPrintAmbigReadsInOneLine = f.checkArg(argc, argv, "--ambiguosReadInOneLine");

        f.checkIntOpt(argc, argv, "-k", p.maxAlignPerRead);
        if (p.maxAlignPerRead >= (int)MAX_MAP_NO_PER_READ) {
            p.maxAlignPerRead = (int)MAX_MAP_NO_PER_READ - 1; // Must save a space
        }
}

ParameterList getParameterList(int argc, const char** argv)
{
    ParameterList parameters;
    if (argc <= 1 || argv == NULL) {
        printSynopsis();
    } else if (argc == 2) {
        printUsageInfo(argv[1]);
    } else {
        parameters.fullCommand = getFullCommand(argc, argv);
        CFlags f;
        strcpy(parameters.refFile, argv[1]);
        f.checkpStrOpt(argc, argv, "--refFormat", parameters.refFormat);
        strcpy(parameters.readsFile, argv[2]);
        f.checkpStrOpt(argc, argv, "--readFormat", parameters.readsFileFormat);
        if (argc > 3) {
            int DEFAULT_VAR_T = parameters.subDiffThreshold;
            parameters.subDiffThreshold = atoi(argv[3]);
            if (argv[3][0] != '0' && atoi(argv[3]) == 0) {
                parameters.subDiffThreshold = DEFAULT_VAR_T; // Arg not a number
            }
        }
        bool bAssignSeed = f.checkpStrOpt(argc, argv, "--seed", parameters.seedName);
        if (bAssignSeed) {
            parameters.subDiffThreshold = assignMismatchThresholdBySeed(parameters.seedName, parameters.subDiffThreshold);
        }
        bool bAssignSeed2 = f.checkpStrOpt(argc, argv, "--seed2", parameters.seedName);
        if (bAssignSeed2) {
            parameters.subDiffThreshold2 = assignMismatchThresholdBySeed(parameters.seedName2, parameters.subDiffThreshold2);
        }
        f.checkIntOpt(argc, argv, "-v", parameters.subDiffThreshold);
        if(!f.checkIntOpt(argc, argv, "-v2", parameters.subDiffThreshold2)) {
            parameters.subDiffThreshold2 = parameters.subDiffThreshold;
        }
        f.checkIntOpt(argc, argv, "-q", parameters.mismatchScoreThreshold);

        // Special options to map strand specific reads to forward strand only
        parameters.bMap2ForwardStrandOnly = f.checkArg(argc, argv, "--forwardOnly");
        parameters.bMap2ReverseStrandOnly = f.checkArg(argc, argv, "--reverseOnly");

        // The default will try to used the saved index table, without attempting to save table.
        parameters.bMakeIndex = f.checkArg(argc, argv, "-m");
        parameters.bSaveIndex = f.checkArg(argc, argv, "-s");
        f.checkpStrOpt(argc, argv, "-s", parameters.indexFileN);

        f.checkUnIntOpt(argc, argv, "-T", parameters.truncatedReadLength);
        f.checkUnIntOpt(argc, argv, "-t", parameters.truncatedReadPrefix);
        // read filtering
        parameters.bDiscardReadWithN = !f.checkArg(argc, argv, "--includeReadsWN");
        if (!parameters.bDiscardReadWithN) {
            parameters.allowedNumOfNinRead = MAX_READ_LENGTH;
        }
        f.checkUnIntOpt(argc, argv, "--includeReadsWN", parameters.allowedNumOfNinRead);
        if (parameters.allowedNumOfNinRead > 0) {
            parameters.bDiscardReadWithN = false;
        }

        // truncate the read ID
        f.checkpCharOpt(argc, argv, "--delimiter", parameters.readtag_delimiter);
        // for quality score (incomplete)
        parameters.bPrintAlignments = !f.checkArg(argc, argv, "--statsOnly");
        parameters.bIgnoreQS = f.checkArg(argc, argv, "--ignoreQS");
        parameters.bPrintNM = f.checkArg(argc, argv, "--printNM");
        parameters.bPrintNH = f.checkArg(argc, argv, "--printNH");
        // Set max number of Multi-thread
        unsigned int noMaxTreadNo = parameters.maxThreadNum;
        if (f.checkUnIntOpt(argc, argv, "-p", noMaxTreadNo)) {
            if (noMaxTreadNo > 0) {
                parameters.maxThreadNum = min(noMaxTreadNo, parameters.maxThreadNum);
            }
        }
        getParameterList4AmbiguousReadFiltering(argc, argv, f, parameters);
        getParameterList4OptionalOutput(argc, argv, f, parameters);
        getParameterList4PairedEnd(argc, argv, f, parameters);
        // Check if the first parameter are options not file
        if (argv[1][0] == '-') {
            // use the last two options as ref and read
            strcpy(parameters.refFile, argv[argc - 2]);
            strcpy(parameters.readsFile, argv[argc - 1]);
        }
        f.checkUnrecognizedFlags(argc, argv);
        parameters.getOptsByCheckingExtName();
    }
    return(parameters);
}

bool withFastaExtFileName(const char* fileName)
{
    if (hasTheExtName(fileName, ".fasta") ||
            hasTheExtName(fileName, ".fna") ||
            hasTheExtName(fileName, ".mfa") ||
            hasTheExtName(fileName, ".fa")) {
        return (true);
    } else {
        return(false);
    }
}

bool withSupportExtFileName(const char* fileName)
{
    if (withFastaExtFileName(fileName) ||
            hasTheExtName(fileName, ".csfasta") ||
            hasTheExtName(fileName, ".csfa") ||
            hasTheExtName(fileName, ".fastqsanger") ||
            hasTheExtName(fileName, ".fastq") ||
            hasTheExtName(fileName, ".fq") ||
            hasTheExtName(fileName, ".csfastq") ||
            hasTheExtName(fileName, ".csfq")) {
        return (true);
    } else {
        return(false);
    }
}

/*
 * This function get the read set file names in a file list to vectors.
 * If reads are not paired, only readSetList1 will be filled.
 * If reads are paired, both readSetList1 and readSetList2 will be filled.
 */
bool getReadSetsFilenames(ParameterList &P, \
                          vector<string>& readSetList1,\
                          vector<string>& readSetList2)
{
    P.bMatePairedReads = false;
    if (fileExist(P.readsFile)) {
        if (hasTheExtName(P.readsFile, ".txt") || P.refFormat == "list") { // read files are in a list.
            LOG_INFO("Info %d: Reading the file as a read set list\n", FINE_LOG);
            char readSetFile1[FILENAME_MAX];
            char readSetFile2[FILENAME_MAX];
            /*
            ifstream readsFileList(P.readsFile);
            P.bMatePairedReads = GetNextFilenamePairFromListFile(readsFileList, readSetFile1, readSetFile2);
            readsFileList.close();
            */
            P.bMatePairedReads = GetNextFilenamePairFromListFile(P.readsFile, readSetFile1, readSetFile2);

            // When forward and backward paired reads are stored in separated files and listed in a row.
            if (P.bMatePairedReads) {
                ifstream readsFileList(P.readsFile);
                while (GetNextFilenamePairFromListFile(readsFileList, readSetFile1, readSetFile2)) {
                    readSetList1.push_back(readSetFile1);
                    readSetList2.push_back(readSetFile2);
                }
                readsFileList.close();
            } else {
                ifstream readsFileList(P.readsFile);
                while (GetNextFilenameFromListFile(readsFileList, readSetFile1)) {
                    readSetList1.push_back(readSetFile1);
                }
                readsFileList.close();
            }
        } else if (withSupportExtFileName(P.readsFile) ||
                   withSupportExtFileName(P.readsFileFormat)) {
            readSetList1.push_back(string(P.readsFile));
        } else {
            cout << " The reads file is not in a recognizable format (ext name).\n" << endl;
            return(false);
        }
    } else if (fileExist(P.matePairFileN1) && fileExist(P.matePairFileN2)) {
        if((hasTheExtName(P.matePairFileN1, ".txt") && hasTheExtName(P.matePairFileN2, ".txt")) || P.refFormat == "list") {
            ifstream readsFileList1(P.matePairFileN1);
            ifstream readsFileList2(P.matePairFileN2);
            char readSetFile1[FILENAME_MAX];
            char readSetFile2[FILENAME_MAX];
            while (GetNextFilenameFromListFile(readsFileList1, readSetFile1) &&
                    GetNextFilenameFromListFile(readsFileList2, readSetFile2)) {
                readSetList1.push_back(readSetFile1);
                readSetList2.push_back(readSetFile2);
            }
            readsFileList1.close();
            readsFileList2.close();
        } else if ((withSupportExtFileName(P.matePairFileN1)\
                    && withSupportExtFileName(P.matePairFileN2))\
                   || withSupportExtFileName(P.readsFileFormat)) {
            readSetList1.push_back(string(P.matePairFileN1));
            readSetList2.push_back(string(P.matePairFileN2));
            P.bMatePairedReads = true;
        } else {
            cout << " The reads files are not in a recognizable format (ext name).\n" << endl;
            return(false);
        }
    } else {
        if (P.matePairFileN1[0] != '\0' && !fileExist(P.matePairFileN1)) {
            printf("Can't open reads file %s.", P.matePairFileN1);
        }
        if (P.matePairFileN2[0] != '\0' && !fileExist(P.matePairFileN2)) {
            printf("Can't open reads file %s.", P.matePairFileN2);
        }
        if (atoi(P.readsFile) == 0 && !fileExist(P.readsFile)) {
            printf("Can't open reads file %s.", P.readsFile);
        }
        return(false);
    }
    return(true);
}

bool printOptWarning4PairedEndOpts(ParameterList &P)
{
    bool optIsCorrect = true;
    if(!P.bMatePairedReads) {
        if(P.disLB > 0) {
            cout << "Unexpected -L option for single end mapping " << endl;
            optIsCorrect = false;
        }
        if(P.disUB != (int)DEFAULT_UPPER_BOUND) {
            cout << "Unexpected -U option for single end mapping " << endl;
            optIsCorrect = false;
        }
        if(P.frOnly) {
            cout << "Unexpected --fr option for single end mapping " << endl;
            optIsCorrect = false;
        }
        /*
        if(P.rfOnly) {
        	cout << "Unexpected --rf option for single end mapping " << endl;
        	optIsCorrect = false;
        }
        */
        if(P.ffOnly) {
            cout << "Unexpected --ff option for single end mapping " << endl;
            optIsCorrect = false;
        }
        if(P.bExcludeAmbiguousPaired) {
            cout << "Unexpected --e option for single end mapping " << endl;
            optIsCorrect = false;
        }
    }
    return(optIsCorrect);
}

bool printOptWarning4SingleEndOpts(ParameterList &P)
{
    // TODO: have more strict test for single end options.
    bool optIsCorrect = true;
    if(P.bMatePairedReads) {
        if(P.bExcludeAmbiguousReads) {
            cout << "Unexpected -E option for paired-end mapping " << endl;
            optIsCorrect = false;
        }
    }
    /*
    if(P.bMappedLongRead && P.bMappedSOLiDRead) {
    	cout << "This version of PerM doesn't support SOLiD read longer than 64 bases/colors " << endl;
    	optIsCorrect = false;
    }*/
    return(optIsCorrect);
}


// return false if the ext name of readSet is not expected
bool checkFileListHasTheRightExt(vector<string>& readSetList)
{
    std::string extName;
    for ( vector<string>::iterator it = readSetList.begin();
            it != readSetList.end(); it++) {
        if ( it == readSetList.begin()) {
            extName = std::string(getExtName(it->c_str()));
        } else if ( *it == string("") ) {
            continue;
        } else {
            if (extName != std::string(getExtName(it->c_str()))) {
                cout << "Reads input file " << *it << " doesn't have the ext name " << extName << ".\n";
                return(false);
            }
        }
        if (!withSupportExtFileName(it->c_str())) {
            cout << "Reads input file " << *it << " has a unexpected ext name\n";
            return(false);
        }
    }
    return(true);
}

bool checkReadsSetNamesValidity(vector<string>& readSetsList1, vector<string>& readSetsList2)
{
    bool valid_flag = true;
    if (readSetsList1.size() == 0) {
        cout << "There are no read sets to mapped " << endl;
        return(false);
    } else {
        valid_flag = checkFileListHasTheRightExt(readSetsList1);
        if (valid_flag && readSetsList2.size() > 0) {
            if (readSetsList1.size() != readSetsList2.size()) {
                cout << "The paired end read set has different size" << endl;
                valid_flag = false;
            }
            valid_flag = checkFileListHasTheRightExt(readSetsList2);
        }
    }
    if (!valid_flag) {
        cout << "Please check the input reads files.\n" << endl;
    }
    return(valid_flag);
}


